Admin

Load packages

library(tidyverse)
library(sf)
library(osmdata)
library(ggmap)

library(httr)
library(jsonlite)

# unload packages
# detach("package:microbenchmark", unload=TRUE)

Explore OSM data

Resources

Vignette: https://cran.r-project.org/web/packages/osmdata/vignettes/osmdata.html

Map features: https://wiki.openstreetmap.org/wiki/Map_Features

?available_features() and ?available_tags([feature])

Baltimore

Query parameters

List of parameters:

  • City name
  • key
  • value
Balt_bbox <- getbb("Baltimore") %>% 
  opq()

Transit

Query the bus stops in Baltimore.

Balt_busStops <- Balt_bbox %>% 
  add_osm_feature(key = "highway", value = "bus_stop") %>% 
  osmdata_sf() %>% 
  # keep only the points. Note that the query returned 4333 points, 9 polygons, and 1 multi-line feature
  .$osm_points

What’s in the bus stops data?

glimpse(Balt_busStops)
## Rows: 4,333
## Columns: 31
## $ osm_id                        <chr> "37751877", "393491959", "414569919",...
## $ name                          <chr> "Philadelphia Road & Fontana Lane Opp...
## $ CCC.CORNER                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ amenity                       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ bench                         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ bin                           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ brand                         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ brand.wikidata                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ bus                           <chr> "yes", "yes", NA, "yes", NA, "yes", N...
## $ commuter_bus                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ covered                       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ highway                       <chr> "bus_stop", "bus_stop", "bus_stop", "...
## $ intercity_bus                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ lit                           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ local_ref                     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ name.zh                       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ network                       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ note                          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ operator                      <chr> "MDOT Maryland Transit Administration...
## $ passenger_information_display <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ public_transport              <chr> "stop_position", "stop_position", NA,...
## $ ref                           <chr> "6632", "437", NA, NA, NA, NA, NA, NA...
## $ route_ref                     <chr> "36;56", "104", NA, NA, NA, NA, NA, N...
## $ shelter                       <chr> "yes", "no", NA, NA, NA, NA, NA, NA, ...
## $ shelter_type                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ source                        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ surface                       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ tactile_paving                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ website                       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ wheelchair                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ geometry                      <POINT [°]> POINT (-76.488 39.33856), POINT...

Make a quick map

Balt_map <- get_map(getbb("Baltimore"), maptype = "toner-background")
ggmap(Balt_map) +
  geom_sf(data = Balt_busStops,
          inherit.aes = FALSE, # this is necessary
          alpha = 0.5) +
  labs(title = "Bus Stops in Baltimore",
       caption = "Data source: OSM",
       x = "",
       y = "")

Query some historical data

API admin

base_url <- "https://api.ohsome.org/v0.9"
api_metadata <- GET(url= paste(base_url, "/metadata", sep = "")) %>% 
  content("text") %>% 
  fromJSON()

The below tells us the database contains data from October 8, 2007 to May 23, 2020.

api_metadata$extractRegion$temporalExtent
## $fromTimestamp
## [1] "2007-10-08T00:00:00Z"
## 
## $toTimestamp
## [1] "2020-05-23T03:00:00Z"

Aggregation endpoint

Use this endpoint to aggregate OSM data, e.g. counts, areas, lengths, and users (contributors).

Let’s look at the trends for mapping bus stops in Baltimore.

# api_bbox <- getbb("Baltimore") %>% bbox_to_string() # note that the order of the coords is flipped from what the database needs
balt_bbox <- "-76.770759, 39.1308816,-76.450759,39.4508816"
# api_bbox <- "8.6581,49.3836,8.7225,49.4363"

api_keys <- "highway"
api_values <- "bus_stop"

monthly <- "2007-11-01/2020-05-23/P1M"

api_data <- GET(url = paste(base_url, "/elements/count", sep = ""),
                query = list(
                  bboxes = balt_bbox,
                  keys = api_keys,
                  values = api_values,
                  time = monthly))

busStops_hist <- content(api_data, as = "text") %>% 
  fromJSON() %>% 
  .$result

The query from osmdata showed 4333 bus stops in Baltimore currently. The OSHDB query shows 4241 bus stops as of May 1, 2020.

ggplot(busStops_hist, 
       aes(x = as.Date(timestamp),
           y = value)) +
  geom_line() +
  theme_bw() +
  labs(title = "Bus Stops in Baltimore Mapped In OSM Over Time",
       caption = "Source: OSHDB, ohsome API",
       x = "Date",
       y = "Count")

Extraction endpoint

Use this endpoint to pull historical snapshots of OSM features.

Code below is not the most elegant and the resulting dataframe seems odd - is there a better way to convert the API geoJSON response into sf?

# is there a more elegant way to do the below?
file <- tempfile(fileext = ".geojson")
extraction_api_data <- GET(url= paste(base_url, "/elements/geometry", sep = ""),
                query = list(
                  bboxes = balt_bbox,
                  keys = api_keys,
                  values = api_values,
                  types = "point", # do I want to limit it to points only?
                  time = monthly),
                httr::write_disk(file))
busStops_geom_hist <- sf::read_sf(file)
busStops_geom_hist$X.snapshotTimestamp <- as.Date(busStops_geom_hist$X.snapshotTimestamp)

busStops_geom_hist
## Simple feature collection with 84352 features and 3 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -76.77029 ymin: 39.13266 xmax: -76.45081 ymax: 39.44865
## geographic CRS: WGS 84
## # A tibble: 84,352 x 4
##    X.osmId        X.snapshotTimestamp highway              geometry
##    <chr>          <date>              <chr>             <POINT [°]>
##  1 node/663402108 2010-04-01          bus_stop (-76.66353 39.37646)
##  2 node/663402108 2010-05-01          bus_stop (-76.66353 39.37646)
##  3 node/663402108 2010-06-01          bus_stop (-76.66353 39.37646)
##  4 node/663402108 2010-07-01          bus_stop (-76.66353 39.37646)
##  5 node/663402108 2010-08-01          bus_stop (-76.66353 39.37646)
##  6 node/663402108 2010-09-01          bus_stop (-76.66352 39.37636)
##  7 node/663402108 2010-10-01          bus_stop (-76.66352 39.37636)
##  8 node/663402108 2010-11-01          bus_stop (-76.66352 39.37636)
##  9 node/663402108 2010-12-01          bus_stop (-76.66352 39.37636)
## 10 node/663402108 2011-01-01          bus_stop (-76.66352 39.37636)
## # ... with 84,342 more rows
# busStops_geom_hist <- content(extraction_api_data, as = "text") %>%
#   fromJSON()

# write(busStops_geom_hist %>% toJSON(), "test.json")

According to our aggregated data, there was a massive spike in bus stops in Baltimore on OSM at the beginning of February, 2019: from 280 to 3968.

First, does the geometry data have the same number of observations as the aggregated data? It’s very close - it may be a matter of the time of day queried.

busStops_changeMap <- busStops_geom_hist %>% 
  filter(X.snapshotTimestamp %in% as.Date(c("2019-02-01", "2019-01-01")))

busStops_changeMap %>% group_by(X.snapshotTimestamp) %>% 
  summarize(count = n())
## Simple feature collection with 2 features and 2 fields
## geometry type:  MULTIPOINT
## dimension:      XY
## bbox:           xmin: -76.77029 ymin: 39.13266 xmax: -76.45081 ymax: 39.44865
## geographic CRS: WGS 84
## # A tibble: 2 x 3
##   X.snapshotTimesta~ count                                              geometry
##   <date>             <int>                                      <MULTIPOINT [°]>
## 1 2019-01-01           272 ((-76.7639 39.18362), (-76.76289 39.31459), (-76.761~
## 2 2019-02-01          3960 ((-76.77029 39.41797), (-76.77025 39.41795), (-76.77~

Next, compare to two months side-by-side on a map.

ggmap(Balt_map) +
  geom_sf(data = busStops_changeMap,
          inherit.aes = FALSE, 
          alpha = 0.5) +
  labs(title = "Change in Bus Stops Mapped on OSM in Baltimore",
       subtitle = "Number of bus stops jumped from 280 in January 2019 to 3968 in February 2019.",
       caption = "Data source: OSHDB",
       x = "",
       y = "") +
  facet_wrap(~ X.snapshotTimestamp)